import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

df = pd.DataFrame(pd.read_csv('winemag-data_first150k.csv'))
"""
print('-----------------country-----------------')
print(df['country'].value_counts())
print('-----------------description-----------------')
print(df['description'].value_counts())
print('-----------------designation-----------------')
print(df['designation'].value_counts())
print('-----------------points-----------------')
print(df['points'].value_counts())
print('-----------------price-----------------')
print(df['price'].value_counts())
print('-----------------province-----------------')
print(df['province'].value_counts())
print('-----------------region_1-----------------')
print(df['region_1'].value_counts())
print('-----------------region_2-----------------')
print(df['region_2'].value_counts())
print('-----------------variety-----------------')
print(df['variety'].value_counts())
print('-----------------winery-----------------')
print(df['winery'].value_counts())
"""


print('-----------------Missing-----------------')
print(df.isna().sum())
print('-----------------Quartile-----------------')
print(df.quantile([0, .25, 0.5, .75, 1]))

"""
print('-----------------Histogram-----------------')
df['points'].hist()
plt.show()


df['price'].hist()
plt.show()


print('-----------------Boxplot-----------------')
df.boxplot('points')
plt.show()


df.boxplot('price')
plt.show()


print(df.info())
df1 = df.dropna(subset=['country','points','price'])
print(df1.info())
df2 = df1.fillna('unknow')
print(df2.info())

"""
